# Read in data
stop_frisk_df = 
  # Read in data from internet
  GET("https://www1.nyc.gov/assets/nypd/downloads/excel/analysis_and_planning/stop-question-frisk/sqf-2016.csv") %>% 
  content("parsed") %>% 
  
  # Clean and fix names of columns
  janitor::clean_names() %>% 
  rename(
    precinct = pct,
    date_stop = datestop,
    time_stop = timestop,
    stop_in_out = inout,
    obs_time_min = perobs,
    stop_time_min = perstop,
    arst_made = arstmade,
    off_in_unif = offunif,
    hair_col = haircolr,
    eye_col = eyecolor,
    other_feature = othfeatr,
    boro = city
  )  %>% 
  mutate(
    # Combine height columns
    height_inch = ht_feet * 12 + ht_inch,
    # Convert date to proper format
    date_stop = mdy(date_stop),
    # Convert time to proper format
    time_stop = hm(time_stop / 100),
    # Recode to be more informative
    stop_in_out = recode(stop_in_out, "I" = "inside", "O" = "outside"),
    race = recode(
      race, 
      "A" = "other", 
      "B" = "black", 
      "I" = "other",
      "P" = "black-hispanic",
      "Q" = "white-hispanic",
      "W" = "white",
      "U" = "other",
      "Z" = "other"
    ),
    hair_col = recode(
      hair_col,
      "BA" = "bald",
      "BK" = "black",
      "BL" = "blond",
      "BR" = "brown",
      "DY" = "other",
      "FR" = "other",
      "GY" = "other",
      "RA" = "other",
      "SN" = "other",
      "SP" = "other",
      "WH" = "other",
      "XX" = "other",
      "ZZ" = "other",
    ),
    eye_col = recode(
      eye_col,
      "BK" = "black",
      "BL" = "blue",
      "BR" = "brown",
      "DF" = "other",
      "GR" = "other",
      "GY" = "other",
      "HA" = "other",
      "MA" = "other",
      "PK" = "other",
      "VI" = "other",
      "XX" = "other",
      "Z" = "other",      
    ),
    build = recode(
      build,
      "H" = "heavy",
      "M" = "medium",
      "T" = "thin",
      "U" = "other",
      "Z" = "unknown"
    ),
    # change boro columns to lowercase for consistency
    boro = tolower(boro),
    # change character datatypes to numeric
    age = as.numeric(age),
    obs_time_min = as.numeric(obs_time_min),
    stop_time_min = as.numeric(stop_time_min)
  )  %>% 
  # select columns for further analysis
  select(precinct, date_stop, time_stop, stop_in_out, obs_time_min, stop_time_min, arst_made, off_in_unif, frisked, 
         searched, rf_vcrim, rf_othsw, rf_attir:ac_evasv, cs_furtv:cs_other, rf_knowl, sb_hdobj:sb_admis, rf_furt, 
         rf_bulg, sex, race, age, height_inch, weight:build, boro, xcoord, ycoord) %>% 
  # change all columns that have Y/N to 1/0
  mutate_at(vars(arst_made:rf_bulg), funs(recode(., "Y" = "1", "N" = "0"))) %>% 
  # change binary columns to numeric instead of character
  mutate_at(vars(arst_made:rf_bulg), funs(as.numeric(.))) %>% 
  # converts all character variables to factors (this does the same as the for loop)
  mutate_if(is.character, as.factor) %>% 
  # remove the single row of NAs
  filter(!is.na(build))

Converting X Y Coordinates

test_df= 
  stop_frisk_df %>% 
  select(xcoord, ycoord) %>% 
  drop_na()

coordinates(test_df) <- c("xcoord","ycoord")
proj4string(test_df) <- CRS("+init=epsg:2263")
CRS.new <- CRS("+init=epsg:4326") 
test_df_new <- spTransform(test_df, CRS.new)
test_df_new <- data.frame(longitude = coordinates(test_df_new)[,1], latitude = coordinates(test_df_new)[,2])

Adding Precinct Data & Converting X/Y Coords

nypd_prec = 
  st_read("./data/nypp/geo_export_55605c77-3922-4b7a-bd65-18d11225a91f.shp")
## Reading layer `geo_export_55605c77-3922-4b7a-bd65-18d11225a91f' from data source `/Users/laurenfranks/Documents/Columbia/Data Science/final project/group_13_final_proj/data/nypp/geo_export_55605c77-3922-4b7a-bd65-18d11225a91f.shp' using driver `ESRI Shapefile'
## Simple feature collection with 77 features and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -74.25559 ymin: 40.49612 xmax: -73.70001 ymax: 40.91553
## epsg (SRID):    4326
## proj4string:    +proj=longlat +ellps=WGS84 +no_defs
long_lat_df =
  stop_frisk_df %>% 
  select(xcoord, ycoord) %>% 
  drop_na()

coordinates(long_lat_df) <- c("xcoord", "ycoord")
proj4string(long_lat_df) <- CRS("+init=epsg:2263")
long_lat_df <- spTransform(long_lat_df, CRS("+init=epsg:4326"))
long_lat_df <- data.frame(longitude = coordinates(long_lat_df)[,1], latitude = coordinates(long_lat_df)[,2])

stop_frisk_map_df = 
  stop_frisk_df %>% 
  drop_na(xcoord) %>% 
  bind_cols(long_lat_df) %>% 
  select(-xcoord, -ycoord) %>% 
  mutate(
    sex = recode(sex, "M" = "male", "F" = "female"),
    label = str_c("Gender: ", sex, "<br/>", "Race: ", race, "<br/>", "Age: ", age, "<br/>", "Build: ", build) %>% map(htmltools::HTML))

Creating Maps

Basic Location Maps

# Locations of Stops
stop_frisk_map_df %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(
    ~longitude, ~latitude,
    radius = 1,
    opacity = 0.6,
    label = ~label)
# Locations of Frisks
stop_frisk_map_df %>% 
  filter(frisked==1) %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(
    ~longitude, ~latitude,
    radius = 1,
    opacity = 0.6,
    label = ~label)
# Locations of Stops with Precincts Outlines
stop_frisk_map_df %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(data = nypd_prec,
              color = "#000000",
              fillColor = "#FFFFFF",
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.1) %>% 
  addCircleMarkers(
    ~longitude, ~latitude,
    radius = 1,
    opacity = 0.6,
    label = ~label) 
# Locations of Frisks with Precincts Outlines
stop_frisk_map_df %>% 
  filter(frisked==1) %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(data = nypd_prec,
              color = "#000000",
              fillColor = "#FFFFFF",
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.1) %>% 
  addCircleMarkers(
    ~longitude, ~latitude,
    radius = 1,
    opacity = 0.6,
    label = ~label)

Amount of Stops/Frisks/Arrests by Precinct

# Colored Area based on Number of Stops with Information on # of Frisks by Precinct
stops_by_prec = 
  stop_frisk_df %>% 
  group_by(precinct) %>% 
  summarize(totalstops = n()) %>% 
  drop_na()

frisks_by_prec = 
  stop_frisk_df %>% 
  group_by(precinct) %>% 
  filter(frisked==1) %>% 
  summarize(totalfrisks = n())

friskorsearch_by_prec = 
  stop_frisk_df %>% 
  group_by(precinct) %>% 
  filter(frisked==1|searched==1) %>% 
  summarize(totalfriskorsearch = n())

arrests_by_prec = 
  stop_frisk_df %>% 
  group_by(precinct) %>% 
  filter(arst_made==1) %>% 
  summarize(totalarrests = n())

nypd_prec  = 
  inner_join(nypd_prec, stops_by_prec, by="precinct") %>% 
  inner_join(., frisks_by_prec, by="precinct") %>% 
  inner_join(., arrests_by_prec, by="precinct") %>% 
  inner_join(., friskorsearch_by_prec, by="precinct")

pal <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalstops))
pal2 <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalfrisks))
pal3 <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalarrests))
pal4 <- colorNumeric("YlOrRd", domain = range(nypd_prec$totalfriskorsearch))

nypd_prec =
nypd_prec %>% 
  mutate(
    tsc = pal(totalstops),
    tfc = pal2(totalfrisks),
    tac = pal3(totalarrests),
    tfsc = pal4(totalfriskorsearch),
    label = str_c("Precinct: ", precinct, "<br/>", "Total Stops in 2016: ", totalstops, "<br/>", "Total Frisks or Searches in 2016: ", totalfriskorsearch, "<br/>", "Total Arrests in 2016: ", totalarrests) %>% map(htmltools::HTML)
  )

Maps by Precinct

# Total Stops by Precinct
nypd_prec %>% 
  leaflet() %>% 
  addTiles() %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(label = ~label,
              color = "#000000",
              fillColor = ~tsc,
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.75,
              highlightOptions = highlightOptions(color = "white",
                                                  weight = 2,
                                                  bringToFront = TRUE)) %>% 
  addLegend(pal = pal,
            values = ~totalstops,
            title = "Total Number of Stops",
            position = "bottomright")
# Total Frisks or Searches by Precinct
nypd_prec %>% 
  leaflet() %>% 
  addTiles() %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(label = ~label,
              color = "#000000",
              fillColor = ~tfsc,
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.75,
              highlightOptions = highlightOptions(color = "white",
                                                  weight = 2,
                                                  bringToFront = TRUE)) %>% 
  addLegend(pal = pal4,
            values = ~totalfriskorsearch,
            title = "Total Number of Frisks or Searches",
            position = "bottomright")
# Total Arrests by Precinct
nypd_prec %>% 
  leaflet() %>% 
  addTiles() %>%
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(label = ~label,
              color = "#000000",
              fillColor = ~tac,
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.75,
              highlightOptions = highlightOptions(color = "white",
                                                  weight = 2,
                                                  bringToFront = TRUE)) %>% 
  addLegend(pal = pal3,
            values = ~totalarrests,
            title = "Total Number of Arrests",
            position = "bottomright")

Additing Most Frequent Reason for Stops by Precinct

stop_reason_total =
stop_frisk_df %>% 
  select(precinct, cs_objcs:cs_lkout, cs_cloth, cs_drgtr, cs_furtv, cs_vcrim:cs_other, frisked) %>% 
  pivot_longer(
    cs_objcs:cs_other,
    names_to = "reason_stopped",
    values_to = "stops"
  ) %>% 
  mutate(
    reason_stopped = recode(
    reason_stopped,
    "cs_objcs" = "carrying suspicious object",
    "cs_descr" = "fits a relevant description",
    "cs_casng" = "casing a victim or location",
    "cs_lkout" = "suspect acting as a lookout",
    "cs_cloth" = "wearing clothes commonly used in crimes",
    "cs_drgtr" = "actions indicative of drug transaction",
    "cs_furtv" = "furtive movements",
    "cs_vcrim" = "actions engaging in violent crime",
    "cs_bulge" = "suspcious bulge",
    "cs_other" = "other"
  )) %>% 
  filter(stops == 1) %>% 
  group_by(reason_stopped, precinct) %>% 
  summarize(total =n()) %>% 
  ungroup() %>% 
  group_by(precinct) %>% 
  top_n(1, total) %>% 
  mutate(greatreasonstop = reason_stopped) %>% 
  select(precinct, greatreasonstop)

nypd_prec = left_join(nypd_prec, stop_reason_total, by="precinct")

nypd_prec =
nypd_prec %>% 
  mutate(
    label = str_c("Precinct: ", precinct, "<br/>", "Total Stops in 2016: ", totalstops, "<br/>", "Total Frisks or Searches in 2016: ", totalfriskorsearch, "<br/>", "Total Arrests in 2016: ", totalarrests, "<br/>", "Most Frequent Reason for Stop: ", greatreasonstop) %>% map(htmltools::HTML)
  )

Heatmap of Stops by Race

stops_by_race = 
  left_join(stop_frisk_map_df, stops_by_prec, by="precinct") %>% 
  group_by(precinct, race) %>% 
  summarize(stops_by_race = n()) %>% 
  left_join(., stops_by_prec, by="precinct") %>% 
  mutate(
    percentstops = (stops_by_race/totalstops)*100
  ) 

palfact <- colorFactor("viridis", domain = stops_by_race$race)

max_stops_by_race = 
  stops_by_race %>% 
  group_by(precinct) %>% 
  top_n(1, percentstops) %>% 
  mutate(maxstoppedrace = race) %>% 
  select(precinct, maxstoppedrace)

nypd_prec = left_join(nypd_prec, max_stops_by_race, by="precinct")

nypd_prec %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(label = ~maxstoppedrace,
              color = "#000000",
              fillColor = ~palfact(maxstoppedrace),
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.75
  ) %>% 
   addLegend(pal = palfact,
            values = ~maxstoppedrace,
            title = "Race Making Up the Largest <br/>
            Percentage Stops",
            position = "bottomright")

Combining to Try Layering Functioning (but this can be accomplished with Shiny as well)

stop_frisk_map_df %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(
    ~longitude, ~latitude,
    radius = 1,
    opacity = 0.6,
    group = "Individual Stops") %>% 
  addPolygons(data = nypd_prec,
              label = ~label,
              color = "#000000",
              fillColor = ~tfc,
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.75,
              group = "Police Precincts"
  ) %>% 
  addLegend(data = nypd_prec,
            pal = pal,
            values = ~totalstops,
            title = "Total Number of Stops",
            position = "bottomright",
            group = "Police Precincts"
  ) %>% 
  addLayersControl(
    overlayGroups = c("Individual Stops", "Police Precincts"),
    options = layersControlOptions(collapsed = FALSE)
  )

Testing Additional Layers for Shiny

nypd_prec %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(data = stop_frisk_map_df,
                   ~longitude, ~latitude,
                   radius = 1,
                   opacity = 0.6,
                   label = ~label,
                   group = "Individual Stops") %>% 
  addPolygons(label = ~label,
              color = "#000000",
              fillColor = ~tac,
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.75,
              group = "Police Precinct Info") %>% 
  addLegend(pal = pal3,
            values = ~totalarrests,
            title = "Total Number of Arrests",
            position = "bottomright",
            group = "Police Precinct Info") %>% 
  addPolylines(label = ~maxstoppedrace,
              color = ~palfact(maxstoppedrace),
              weight = 1.5,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.5,
              highlightOptions = highlightOptions(weight = 4,
                                                  bringToFront = TRUE),
              group = "Race of Largest Percentage of Stops") %>% 
   addLegend(pal = palfact,
            values = ~maxstoppedrace,
            title = "Race Making Up the Largest <br/>
            Percentage Stops",
            position = "bottomright",
            group = "Race of Largest Percentage of Stops") %>% 
  addLayersControl(overlayGroups = c("Individual Stops", "Police Precinct Info", "Race of Largest Percentage of Stops"),
                   options = layersControlOptions(collapsed = FALSE)) %>% 
  hideGroup(c("Police Precinct Info", "Race of Largest Percentage of Stops"))

PLACE TO HOLD EXTRA CODE

checkboxInput("ind_stops", "Individual Stops", value = FALSE)
checkboxInput("pol_prec", "Police Precinct Info", value = FALSE)
checkboxInput("race_stops", "Race and Percentage of Stops", value = FALSE)


checkboxGroupInput("variable", label = h3("Map Options:"),
                   c("Individual Stops" = "ind_stops",
                     "Police Precinct Info" = "pol_prec",
                     "Race and Percentage of Stops" = "race_stops"))

selectInput("heatmap", label = h3("Heat Map Options:"),
                   choices = c("Number of Stops" = "~tsc", 
                               "Number of Frisks/Searches" = "~tfsc", 
                               "Number of Arrests" = "~tac"),
                   selected = "Number of Stops")
output$map <- renderLeaflet({
  map <- nypd_prec %>% 
  leaflet() %>% 
  addTiles() %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addCircleMarkers(data = stop_frisk_map_df,
                   ~longitude, ~latitude,
                   radius = 1,
                   opacity = 0.6,
                   label = ~label,
                   group = "Individual Stops") %>% 
  addPolygons(label = ~label,
              color = "#000000",
              fillColor = ~tac,
              weight = 1,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.75,
              group = "Police Precinct Info") %>% 
  addLegend(pal = pal3,
            values = ~totalarrests,
            title = "Total Number <br/> of Arrests",
            position = "topleft",
            group = "Police Precinct Info") %>% 
  addPolylines(label = ~maxstoppedrace,
              color = ~palfact(maxstoppedrace),
              weight = 1.5,
              smoothFactor = 0.5,
              opacity = 1,
              fillOpacity = 0.5,
              highlightOptions = highlightOptions(weight = 4,
                                                  bringToFront = TRUE),
              group = "Race and Percentage of Stops") %>% 
   addLegend(pal = palfact,
            values = ~maxstoppedrace,
            title = "Race Making Up Largest <br/> Percentage Stops",
            position = "bottomright",
            group = "Race and Percentage of Stops") %>% 
  addLayersControl(overlayGroups = c("Individual Stops", "Police Precinct Info", "Race and Percentage of Stops"),
                   options = layersControlOptions(collapsed = FALSE)) %>% 
  hideGroup(c("Individual Stops", "Police Precinct Info", "Race and Percentage of Stops")) 
  
})

leafletOutput('map')

EXTRA SHINY CODE

Row

Number of Stops by Month

renderPlot({
  
  if (input[["analysis_type"]] == "Individual Borough") {
  
    stop_frisk_df %>% 
      filter(
        boro == input[["boro_choice"]]
      ) %>% 
      mutate(
        month_stop = month(date_stop)
      ) %>% 
      filter(
        month_stop != is_null(month_stop)
      ) %>%   
      group_by(month_stop) %>% 
      summarize(
        count = n()
      ) %>% 
      ggplot(aes(x = month_stop, y = count)) + 
      geom_point() +
      geom_smooth(se = FALSE) +
      labs(
        x = "Month",
        y = "Number of Stops"
      )
  
  } else {
    
    stop_frisk_df %>% 
      mutate(
        month_stop = month(date_stop)
      ) %>% 
      filter(
        month_stop != is_null(month_stop)
      ) %>%   
      group_by(month_stop) %>% 
      summarize(
        count = n()
      ) %>% 
      ggplot(aes(x = month_stop, y = count)) + 
      geom_point() +
      geom_smooth(se = FALSE) +
      labs(
        x = "Month",
        y = "Number of Stops"
      )
    
  }
  
})

Number of Stops by Hour

renderPlot({
  
  if (input[["analysis_type"]] == "Individual Borough") {
  
    stop_frisk_df %>% 
      filter(
        boro == input[["boro_choice"]]
      ) %>% 
      mutate(
        hour_stop = hour(time_stop)
      ) %>% 
      group_by(hour_stop) %>% 
      summarize(
        count = n()
      ) %>% 
      ggplot(aes(x = hour_stop, y = count)) + 
      geom_point() +
      geom_smooth(se = FALSE) +
      labs(
        x = "Hour",
        y = "Number of Stops"
      )
  
  } else {
    
    stop_frisk_df %>% 
      mutate(
        hour_stop = hour(time_stop)
      ) %>% 
      group_by(hour_stop) %>% 
      summarize(
        count = n()
      ) %>% 
      ggplot(aes(x = hour_stop, y = count)) + 
      geom_point() +
      geom_smooth(se = FALSE) +
      labs(
        x = "Hour",
        y = "Number of Stops"
      )
    
  }
  
})